# -*- coding: utf-8 -*-
import scrapy
import re


class ZhihuSpider(scrapy.Spider):
    name = 'zhihu'
    allowed_domains = ['www.zhihu.com']
    start_urls = ['http://www.zhihu.com/']

    headers = {
        "HOST": "www.zhihu.com",
        "Referer": "https://www.zhihu.com/",
        "User-Agent":  "Mozilla/5.0 (Windows NT 6.1; Win64; " \
            "x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36",
    }

    def parse(self, response):
        pass

    def start_requests(self):
        return [scrapy.Request("https://www.zhihu.com/#signin", headers=self.headers, callback=self.login)]

    def login(self, response):
        response_text = response.text
        match_obj = re.match('.*name="_xsrf" value="(.*?)"', response.text, re.DOTALL)
        xsrf = ''
        if match_obj:
           xsrf = match_obj.group(1)
        else:
            return "===================match failed!========================"
        if xsrf:
            post_url = "https://www.zhihu.com/login/email_num"
            post_data = {
                "_xsrf": xsrf,
                "phone_num": '18755132178@163.com',
                "password": '448537056zhihu'}

        return [scrapy.FormRequest(url=post_url,
                formdata=post_data,
                headers=self.headers,
                callback=self.check_login)]

    def check_login(self, response):
        # 验证服务器的返回数据判断是否成功

        pass